# -*- coding: utf-8 -*-
# @Date    : 2021/4/25
# @Author  : Maoxian
import os
import time
import requests
from lxml import etree

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/90.0.4430.85 Safari/537.36 Edg/90.0.818.46"
}


def get_ppt_url(url):
    """获取ppt页面链接"""
    response = requests.get(url, headers)
    response.encoding = 'gbk'
    base = response.request.url.split(response.request.path_url)[0]

    html_element = etree.HTML(response.text)
    _element = html_element.xpath('//ul[@class="posts clear"]/li')
    for i in _element:
        yield i.xpath("./a[2]/text()")[0], base + i.xpath("./a[2]/@href")[0]


def download_ppt(name, url):
    """下载PPT文件"""
    response = requests.get(url)
    response.encoding = 'gbk'
    tree = etree.HTML(response.text)
    down_url = tree.xpath('//div[@class="button"]/a/@href')[0]

    content = requests.get(down_url, headers=headers).content
    with open(f'ppt/{name}.{down_url.split(".")[-1]}', 'wb') as f:
        f.write(content)


def main(url):
    if not os.path.exists('ppt'):
        os.mkdir('ppt')

    for ppt_name, ppt_url in get_ppt_url(url):
        print(f'开始下载: {ppt_name} {ppt_url}', end='\t')
        download_ppt(ppt_name, ppt_url)
        print(f'下载完成...')
        time.sleep(1)

if __name__ == '__main__':
    base_url = 'http://www.pptbz.com/pptmoban/jingmeippt/'
    main(base_url)
